dash_charts.pareto_chart⚓︎
Pareto Chart.
View Source
"""Pareto Chart."""
import pandas as pd
import plotly.graph_objects as go
from .utils_data import append_df, validate
from .utils_fig import CustomChart, check_raw_data
def tidy_pareto_data(df_raw, cap_categories):
"""Return compressed Pareto dataframe of only the unique values.
Args:
df_raw: pandas dataframe with at minimum the two columns `category: str` and `value: float`
cap_categories: Maximum number of categories (bars)
Returns:
dataframe: pandas dataframe with columns `(value, label, counts, cum_per)`
"""
df_p = None
for cat in df_raw['category'].unique():
df_row = pd.DataFrame(
data={
'label': [cat],
'value': [df_raw.loc[df_raw['category'] == cat]['value'].sum()],
'counts': df_raw['category'].value_counts()[cat],
},
)
df_p = append_df(df_p, df_row)
# Sort and calculate percentage
df_p = (
df_p[df_p['value'] != 0]
.sort_values(by=['value'], ascending=False)
.head(cap_categories)
)
df_p['cum_per'] = df_p['value'].divide(df_p['value'].sum()).cumsum()
return df_p
class ParetoChart(CustomChart):
"""Pareto Chart: both bar and line graph chart for strategic decision making."""
cap_categories: int = 20
"""Maximum number of categories (bars). Default is 20."""
show_count: bool = True
"""If True, will show numeric count on each bar. Default is True."""
yaxis_2_label: str = 'Cumulative Percentage'
"""Label for yaxis 2 that shows the cumulative percentage."""
_pareto_colors: dict = {'bar': '#4682b4', 'line': '#b44646'}
_pareto_colors_schema = {
'bar': {'required': True, 'type': 'string'},
'line': {'required': True, 'type': 'string'},
}
@property
def pareto_colors(self):
"""Colors for bar and line traces in Pareto chart.
Returns:
dict: dictionary with keys `(bar, line)`
"""
return self._pareto_colors
@pareto_colors.setter
def pareto_colors(self, pareto_colors):
errors = validate(pareto_colors, self._pareto_colors_schema)
if errors:
raise RuntimeError(f'Validation of self.pareto_colors failed: {errors}')
# Assign new pareto_colors
self._pareto_colors = pareto_colors
def create_traces(self, df_raw):
"""Return traces for plotly chart.
Args:
df_raw: pandas dataframe with at minimum the two columns `category: str` and `value: float`
Returns:
list: Dash chart traces
Raises:
RuntimeError: if the `df_raw` is missing any necessary columns
"""
# Check that the raw data frame is properly formatted
check_raw_data(df_raw, min_keys=['category', 'value'])
if not pd.api.types.is_string_dtype(df_raw['category']): # pragma: no cover
raise RuntimeError(f"category column must be string, but found {df_raw['category'].dtype}")
# Create and return the traces and optionally add the count to the bar chart
df_p = tidy_pareto_data(df_raw, self.cap_categories)
count_kwargs = {'text': df_p['counts'], 'textposition': 'auto'} if self.show_count else {}
return [
go.Bar(
hoverinfo='y', yaxis='y1', name='raw_value',
marker={'color': self.pareto_colors['bar']},
x=df_p['label'], y=df_p['value'], **count_kwargs,
),
] + [
go.Scatter(
hoverinfo='y', yaxis='y2', name='cumulative_percentage',
marker={'color': self.pareto_colors['line']}, mode='lines',
x=df_p['label'], y=df_p['cum_per'],
),
]
def create_layout(self):
"""Extend the standard layout.
Returns:
dict: layout for Dash figure
"""
layout = super().create_layout()
layout['legend'] = {}
layout['showlegend'] = False
layout['margin'] = {'l': 75, 'b': 100, 't': 50, 'r': 125}
# Update YAxis configuration
layout['yaxis']['mirror'] = 'ticks'
layout['yaxis']['showline'] = True
layout['yaxis']['tickformat'] = '.0f'
# See multiple axis: https://plot.ly/python/multiple-axes/
layout['yaxis2'] = {
'dtick': 0.1,
'overlaying': 'y',
'range': [0, 1.01],
'showgrid': False,
'side': 'right',
'tickformat': '.0%',
'tickmode': 'linear',
'title': self.yaxis_2_label,
}
return layout
Functions⚓︎
tidy_pareto_data⚓︎
def tidy_pareto_data(
df_raw,
cap_categories
)
Return compressed Pareto dataframe of only the unique values.
Parameters:
| Name | Description |
|---|---|
| df_raw | pandas dataframe with at minimum the two columns category: str and value: float |
| cap_categories | Maximum number of categories (bars) |
Returns:
| Type | Description |
|---|---|
| dataframe | pandas dataframe with columns (value, label, counts, cum_per) |
View Source
def tidy_pareto_data(df_raw, cap_categories):
"""Return compressed Pareto dataframe of only the unique values.
Args:
df_raw: pandas dataframe with at minimum the two columns `category: str` and `value: float`
cap_categories: Maximum number of categories (bars)
Returns:
dataframe: pandas dataframe with columns `(value, label, counts, cum_per)`
"""
df_p = None
for cat in df_raw['category'].unique():
df_row = pd.DataFrame(
data={
'label': [cat],
'value': [df_raw.loc[df_raw['category'] == cat]['value'].sum()],
'counts': df_raw['category'].value_counts()[cat],
},
)
df_p = append_df(df_p, df_row)
# Sort and calculate percentage
df_p = (
df_p[df_p['value'] != 0]
.sort_values(by=['value'], ascending=False)
.head(cap_categories)
)
df_p['cum_per'] = df_p['value'].divide(df_p['value'].sum()).cumsum()
return df_p
Classes⚓︎
ParetoChart⚓︎
class ParetoChart(
*,
title,
xlabel,
ylabel,
layout_overrides=()
)
View Source
class ParetoChart(CustomChart):
"""Pareto Chart: both bar and line graph chart for strategic decision making."""
cap_categories: int = 20
"""Maximum number of categories (bars). Default is 20."""
show_count: bool = True
"""If True, will show numeric count on each bar. Default is True."""
yaxis_2_label: str = 'Cumulative Percentage'
"""Label for yaxis 2 that shows the cumulative percentage."""
_pareto_colors: dict = {'bar': '#4682b4', 'line': '#b44646'}
_pareto_colors_schema = {
'bar': {'required': True, 'type': 'string'},
'line': {'required': True, 'type': 'string'},
}
@property
def pareto_colors(self):
"""Colors for bar and line traces in Pareto chart.
Returns:
dict: dictionary with keys `(bar, line)`
"""
return self._pareto_colors
@pareto_colors.setter
def pareto_colors(self, pareto_colors):
errors = validate(pareto_colors, self._pareto_colors_schema)
if errors:
raise RuntimeError(f'Validation of self.pareto_colors failed: {errors}')
# Assign new pareto_colors
self._pareto_colors = pareto_colors
def create_traces(self, df_raw):
"""Return traces for plotly chart.
Args:
df_raw: pandas dataframe with at minimum the two columns `category: str` and `value: float`
Returns:
list: Dash chart traces
Raises:
RuntimeError: if the `df_raw` is missing any necessary columns
"""
# Check that the raw data frame is properly formatted
check_raw_data(df_raw, min_keys=['category', 'value'])
if not pd.api.types.is_string_dtype(df_raw['category']): # pragma: no cover
raise RuntimeError(f"category column must be string, but found {df_raw['category'].dtype}")
# Create and return the traces and optionally add the count to the bar chart
df_p = tidy_pareto_data(df_raw, self.cap_categories)
count_kwargs = {'text': df_p['counts'], 'textposition': 'auto'} if self.show_count else {}
return [
go.Bar(
hoverinfo='y', yaxis='y1', name='raw_value',
marker={'color': self.pareto_colors['bar']},
x=df_p['label'], y=df_p['value'], **count_kwargs,
),
] + [
go.Scatter(
hoverinfo='y', yaxis='y2', name='cumulative_percentage',
marker={'color': self.pareto_colors['line']}, mode='lines',
x=df_p['label'], y=df_p['cum_per'],
),
]
def create_layout(self):
"""Extend the standard layout.
Returns:
dict: layout for Dash figure
"""
layout = super().create_layout()
layout['legend'] = {}
layout['showlegend'] = False
layout['margin'] = {'l': 75, 'b': 100, 't': 50, 'r': 125}
# Update YAxis configuration
layout['yaxis']['mirror'] = 'ticks'
layout['yaxis']['showline'] = True
layout['yaxis']['tickformat'] = '.0f'
# See multiple axis: https://plot.ly/python/multiple-axes/
layout['yaxis2'] = {
'dtick': 0.1,
'overlaying': 'y',
'range': [0, 1.01],
'showgrid': False,
'side': 'right',
'tickformat': '.0%',
'tickmode': 'linear',
'title': self.yaxis_2_label,
}
return layout
Ancestors (in MRO)⚓︎
- dash_charts.utils_fig.CustomChart
Class variables⚓︎
annotations
cap_categories
show_count
yaxis_2_label
Instance variables⚓︎
axis_range
Specify x/y axis range or leave as empty dictionary for autorange.
pareto_colors
Colors for bar and line traces in Pareto chart.
Methods⚓︎
apply_custom_layout⚓︎
def apply_custom_layout(
self,
layout
)
Extend and/or override layout with custom settings.
Parameters:
| Name | Description |
|---|---|
| layout | base layout dictionary. Typically from self.create_layout() |
Returns:
| Type | Description |
|---|---|
| dict | layout for Dash figure |
View Source
def apply_custom_layout(self, layout):
"""Extend and/or override layout with custom settings.
Args:
layout: base layout dictionary. Typically from self.create_layout()
Returns:
dict: layout for Dash figure
"""
for parent_key, sub_key, value in self.layout_overrides:
if sub_key is not None:
layout[parent_key][sub_key] = value
else:
layout[parent_key] = value
return layout
create_figure⚓︎
def create_figure(
self,
df_raw,
**kwargs_data
)
Create the figure dictionary.
Parameters:
| Name | Description |
|---|---|
| df_raw | data to pass to formatter method |
| kwargs_data | keyword arguments to pass to the data formatter method |
Returns:
| Type | Description |
|---|---|
| dict | keys data and layout for Dash |
View Source
def create_figure(self, df_raw, **kwargs_data):
"""Create the figure dictionary.
Args:
df_raw: data to pass to formatter method
kwargs_data: keyword arguments to pass to the data formatter method
Returns:
dict: keys `data` and `layout` for Dash
"""
return {
'data': self.create_traces(df_raw, **kwargs_data),
'layout': go.Layout(self.apply_custom_layout(self.create_layout())),
}
create_layout⚓︎
def create_layout(
self
)
Extend the standard layout.
Returns:
| Type | Description |
|---|---|
| dict | layout for Dash figure |
View Source
def create_layout(self):
"""Extend the standard layout.
Returns:
dict: layout for Dash figure
"""
layout = super().create_layout()
layout['legend'] = {}
layout['showlegend'] = False
layout['margin'] = {'l': 75, 'b': 100, 't': 50, 'r': 125}
# Update YAxis configuration
layout['yaxis']['mirror'] = 'ticks'
layout['yaxis']['showline'] = True
layout['yaxis']['tickformat'] = '.0f'
# See multiple axis: https://plot.ly/python/multiple-axes/
layout['yaxis2'] = {
'dtick': 0.1,
'overlaying': 'y',
'range': [0, 1.01],
'showgrid': False,
'side': 'right',
'tickformat': '.0%',
'tickmode': 'linear',
'title': self.yaxis_2_label,
}
return layout
create_traces⚓︎
def create_traces(
self,
df_raw
)
Return traces for plotly chart.
Parameters:
| Name | Description |
|---|---|
| df_raw | pandas dataframe with at minimum the two columns category: str and value: float |
Returns:
| Type | Description |
|---|---|
| list | Dash chart traces |
Raises:
| Type | Description |
|---|---|
| RuntimeError | if the df_raw is missing any necessary columns |
View Source
def create_traces(self, df_raw):
"""Return traces for plotly chart.
Args:
df_raw: pandas dataframe with at minimum the two columns `category: str` and `value: float`
Returns:
list: Dash chart traces
Raises:
RuntimeError: if the `df_raw` is missing any necessary columns
"""
# Check that the raw data frame is properly formatted
check_raw_data(df_raw, min_keys=['category', 'value'])
if not pd.api.types.is_string_dtype(df_raw['category']): # pragma: no cover
raise RuntimeError(f"category column must be string, but found {df_raw['category'].dtype}")
# Create and return the traces and optionally add the count to the bar chart
df_p = tidy_pareto_data(df_raw, self.cap_categories)
count_kwargs = {'text': df_p['counts'], 'textposition': 'auto'} if self.show_count else {}
return [
go.Bar(
hoverinfo='y', yaxis='y1', name='raw_value',
marker={'color': self.pareto_colors['bar']},
x=df_p['label'], y=df_p['value'], **count_kwargs,
),
] + [
go.Scatter(
hoverinfo='y', yaxis='y2', name='cumulative_percentage',
marker={'color': self.pareto_colors['line']}, mode='lines',
x=df_p['label'], y=df_p['cum_per'],
),
]
initialize_mutables⚓︎
def initialize_mutables(
self
)
Initialize the mutable data members to prevent modifying one attribute and impacting all instances.
View Source
def initialize_mutables(self):
"""Initialize the mutable data members to prevent modifying one attribute and impacting all instances."""
...
Last update:
August 5, 2022
Created: August 5, 2022
Created: August 5, 2022